# Load packages.library(tidyverse)library(rethinking)sigma_alpha <-1# Intercept standard deviation.sigma_beta <-0.5# Slope standard deviation.rho <--0.7# Correlation between parameter types.# Multivariate covariance decomposed into scale and correlation.sigmas <-c(sigma_alpha, sigma_beta)Rho <-matrix(c(1, rho, rho, 1), nrow =2)# Matrix multiply to get the covariance matrix.Sigma <-diag(sigmas) %*% Rho %*%diag(sigmas)
quad_form_diag()
diag(sigmas) # Scale
[,1] [,2]
[1,] 1 0.0
[2,] 0 0.5
Rho # %*% Correlation Matrix
[,1] [,2]
[1,] 1.0 -0.7
[2,] -0.7 1.0
diag(sigmas) # %*% Scale
[,1] [,2]
[1,] 1 0.0
[2,] 0 0.5
Sigma # = Covariance Matrix
[,1] [,2]
[1,] 1.00 -0.35
[2,] -0.35 0.25
Cholesky Decomposition
We can take any square, symmetric matrix R and decompose it into its Cholesky factor L such that:
Rho
[,1] [,2]
[1,] 1.0 -0.7
[2,] -0.7 1.0
t(chol(Rho)) %*%chol(Rho)
[,1] [,2]
[1,] 1.0 -0.7
[2,] -0.7 1.0
Centered Multilevel Regression
// Index values, observations, and covariates.data {int<lower = 1> N; // Number of observations.int<lower = 1> K; // Number of groups/clusters.int<lower = 1> I; // Number of observation-level covariates.vector[N] y; // Vector of observations.array[N] int<lower = 1, upper = K> g; // Vector of group assignments.matrix[N, I] X; // Matrix of observation-level covariates.}// Parameters.parameters {vector[I] gamma; // Vector of population-level parameters.corr_matrix[I] Omega; // Population model correlation matrix.vector<lower = 0>[I] tau; // Population model scale parameters. matrix[K, I] Beta; // Matrix of observation-level parameters.real<lower = 0> sigma; // Variance of the observation model.}// Regression model.model {// Hyperpriors. gamma ~ normal(0, 5); Omega ~ lkj_corr(4); tau ~ normal(0, 5);// Prior. sigma ~ normal(0, 5);// Population model.for (k in1:K) { Beta[k,] ~ multi_normal(gamma, quad_form_diag(Omega, tau)); }// Observation model.for (n in1:N) { y[n] ~ normal(X[n,] * Beta[g[n],]', sigma); }}// Generate quantities at each draw.generated quantities {// Compute the log-likelihood.vector[N] log_lik;for (n in1:N) { log_lik[n] = normal_lpdf(y[n] | X[n,] * Beta[g[n],]', sigma); }}
Non-Centered Parameterization
Non-Centered Multilevel Regression
// Index values, observations, and covariates.data {int<lower = 1> N; // Number of observations.int<lower = 1> K; // Number of groups/clusters.int<lower = 1> I; // Number of observation-level covariates.vector[N] y; // Vector of observations.array[N] int<lower = 1, upper = K> g; // Vector of group assignments.matrix[N, I] X; // Matrix of observation-level covariates.}// Parameters.parameters {vector[I] gamma; // Vector of population-level parameters.cholesky_factor_corr[I] L_Omega; // Population model correlation matrix Cholesky factor.vector<lower = 0>[I] tau; // Population model scale parameters. matrix[I, K] Delta; // Matrix of non-centered observation-level parameters.real<lower = 0> sigma; // Variance of the observation model.}// Transformed parameters.transformed parameters {matrix[K, I] V; // Matrix of rescaled non-centered observation-level parameters.matrix[K, I] Beta; // Matrix of observation-level parameters.// Compute the non-centered parameterization. V = (diag_pre_multiply(tau, L_Omega) * Delta)';for (i in1:I) { Beta[,i] = gamma[i] + V[,i]; }}// Regression model.model {// Hyperpriors. gamma ~ normal(0, 5); L_Omega ~ lkj_corr_cholesky(4); tau ~ normal(0, 5);// Prior. sigma ~ normal(0, 5);// Population model. to_vector(Delta) ~ normal(0, 1);// Observation model.for (n in1:N) { y[n] ~ normal(X[n,] * Beta[g[n],]', sigma); }}// Generate quantities at each draw.generated quantities {// Compute the log-likelihood.vector[N] log_lik;for (n in1:N) { log_lik[n] = normal_lpdf(y[n] | X[n,] * Beta[g[n],]', sigma); }// Compute the correlation matrix Omega.corr_matrix[I] Omega; Omega = multiply_lower_tri_self_transpose(L_Omega);}
Including Group Covariates
“Unknown” and “Known” Heterogeneity
Having a population model allows us to characterize unknown heterogeneity across the population such that the varying effects are drawn from the given population.
// Population model.
for (k in 1:K) {
Beta[k,] ~ multi_normal(gamma, quad_form_diag(Omega, tau));
}
If we also have data that might help explain that heterogeneity, we can introduce a second linear model that models known heterogeneity.
// Population model.
for (k in 1:K) {
Beta[k,] ~ multi_normal(Z[k,] * Gamma, quad_form_diag(Omega, tau));
}
Non-Centered Multilevel Regression with Population-Level Covariates
// Index values, hyperprior values, observations, and covariates.data {int<lower = 1> N; // Number of observations.int<lower = 1> K; // Number of groups/clusters.int<lower = 1> I; // Number of observation-level covariates.int<lower = 1> J; // Number of population-level covariates.real Gamma_mean; // Mean of population-level means.real<lower = 0> Gamma_scale; // Scale of population-level means.real<lower = 0> Omega_shape; // Shape of population-level scale.real tau_mean; // Mean of population-level scale.real<lower = 0> tau_scale; // Scale of population-level scale.real sigma_mean; // Mean of observation-level variance.real<lower = 0> sigma_scale; // Scale of observation-level variance.vector[N] y; // Vector of observations.array[N] int<lower = 1, upper = K> g; // Vector of group assignments.matrix[N, I] X; // Matrix of observation-level covariates.matrix[K, J] Z; // Matrix of population-level covariates.}// Parameters.parameters {matrix[J, I] Gamma; // Vector of population-level parameters.cholesky_factor_corr[I] L_Omega; // Population model correlation matrix Cholesky factor.vector<lower = 0>[I] tau; // Population model scale parameters. matrix[I, K] Delta; // Matrix of non-centered observation-level parameters.real<lower = 0> sigma; // Variance of the observation model.}// Transformed parameters.transformed parameters {matrix[K, I] V; // Matrix of rescaled non-centered observation-level parameters.matrix[K, I] Beta; // Matrix of observation-level parameters.// Compute the non-centered parameterization. V = (diag_pre_multiply(tau, L_Omega) * Delta)';for (i in1:I) { Beta[,i] = Z * Gamma[,i] + V[,i]; }}// Regression model.model {// Hyperpriors. to_vector(Gamma) ~ normal(Gamma_mean, Gamma_scale); L_Omega ~ lkj_corr_cholesky(Omega_shape); tau ~ normal(tau_mean, tau_scale);// Prior. sigma ~ normal(sigma_mean, sigma_scale);// Population model. to_vector(Delta) ~ normal(0, 1);// Observation model.for (n in1:N) { y[n] ~ normal(X[n,] * Beta[g[n],]', sigma); }}// Generate quantities at each draw.generated quantities {// Compute the log-likelihood.vector[N] log_lik;for (n in1:N) { log_lik[n] = normal_lpdf(y[n] | X[n,] * Beta[g[n],]', sigma); }// Compute the correlation matrix Omega.corr_matrix[I] Omega; Omega = multiply_lower_tri_self_transpose(L_Omega);}